ROUND2 equ 10

stencil_tri:
;  procedure calculate triangle in stencil buffer
;  ----------------in - eax - x1 shl 16 + y1 ----------------------
;  -------------------- ebx - x2 shl 16 + y2 ----------------------
;  -------------------- ecx - x3 shl 16 + y3 ----------------------
;  -------------------- esi - pointer to s-buffer -----------------
;  -------------------- edx - x_res shl 16 + y_res ---------------
;  -------------------- xmm0 - lo -> hi z1, z2, z3 as dword float
;---------------------- xmm1 - lo -> hi y_min, y_max, x_min, x_max
;---------------------- xmm5 - horizontal line proc address

.x1     equ [ebp-2]
.y1     equ [ebp-4]
.x2     equ [ebp-6]
.y2     equ [ebp-8]
.x3     equ [ebp-10]
.y3     equ [ebp-12]
.hlp1   equ [ebp-14]

.dx12   equ dword[ebp-20]
.dx13   equ dword[ebp-24]
.dx23   equ [ebp-28]
.dz12   equ dword[ebp-32]
.dz13   equ [ebp-36]
.dz23   equ [ebp-40]
.zz2    equ [ebp-44]
.zz1    equ [ebp-48]
.z3     equ [ebp-56]
.z2     equ [ebp-60]
.z1     equ [ebp-64]
.s_buff equ [ebp-68]
.x_res  equ word[ebp-70]
.y_res  equ [ebp-72]
.cal_line equ dword[ebp-76]
.xmax     equ [ebp-80]
.xmin     equ [ebp-84]
.ymax     equ [ebp-88]
.ymin     equ [ebp-92]   ; ymin ymax xmin xmax

        push    ebp
        mov     ebp,esp
      .sort2:
        cmp     ax,bx
        jle     .sort1
        xchg    eax,ebx
        shufps  xmm0,xmm0,11100001b
      .sort1:
        cmp     bx,cx
        jle     .sort3
        xchg    ebx,ecx
        shufps  xmm0,xmm0,11011000b
        jmp     .sort2
      .sort3:
        push    eax ebx ecx

        sub      esp,90
        mov     .y_res,edx
        movss   .cal_line,xmm5
        movups  .ymin,xmm1

        movups   .z1,xmm0
        movaps   xmm1,xmm0
        mov      .s_buff,esi

        xorps     xmm2,xmm2
        xorps     xmm4,xmm4
        movups    xmm0,.y3
        movups    xmm3,.hlp1
        psrld     xmm0,16
        psrld     xmm3,16


        pcmpgtw   xmm4,xmm0
        pcmpgtw   xmm2,xmm3

        pslldq    xmm4,2
        pslldq    xmm2,2

        orps      xmm0,xmm4
        orps      xmm3,xmm2

        movaps    xmm2,xmm0
        movaps    xmm4,xmm3

        shufps    xmm0,xmm0,11101001b
        shufps    xmm2,xmm2,11010000b
        psubd     xmm2,xmm0
        cvtdq2ps  xmm2,xmm2


        shufps    xmm3,xmm3,11101001b
        shufps    xmm4,xmm4,11010000b
        psubd     xmm4,xmm3
        cvtdq2ps  xmm4,xmm4

        mov       eax,1 shl ROUND2
        cvtsi2ss  xmm5,eax
        shufps    xmm5,xmm5,0
        mulps     xmm2,xmm5

        rcpps     xmm4,xmm4
        mulps     xmm2,xmm4


        cvtps2dq  xmm2,xmm2



        movaps    xmm0,xmm1
        shufps    xmm0,xmm0,11011010b
        shufps    xmm1,xmm1,11000001b
        subps     xmm0,xmm1
        mulps     xmm0,xmm4
        movups    .dz23,xmm0

        movups    .dx23,xmm2

    .dx23_done:
        mov     edx,.z1

        mov     .zz1,edx
        mov     .zz2,edx
        movlps  xmm0,.zz1
        movsx   eax,word .x1
        shl     eax,ROUND2    ; eax - cur x1
        mov     ebx,eax       ; ebx - cur x2
        mov     edx,.y_res
        mov     esi,.s_buff

        movsx   ecx,word .y1
        cmp     cx,.y2
        jge     .loop1_end
     .loop1:

        pushad
        sar     ebx,ROUND2
        sar     eax,ROUND2
        movups  xmm5,.ymin

        call    .cal_line  ;stencil_line

        popad
        add     eax,.dx13
        add     ebx,.dx12


        movlps  xmm0,.zz1
        movlps  xmm2,.dz13
        addps   xmm0,xmm2
        movlps  .zz1,xmm0

        inc     ecx
        cmp     cx,.y2
        jl      .loop1

    .loop1_end:

        mov     edx,.z2
        mov     .zz2,edx
        movsx   ebx,word .x2
        shl     ebx,ROUND2
        mov     edx,.y_res
        movlps  xmm0,.zz1
        movsx   ecx,word .y2
        cmp     cx,.y3
        jge     .loop2_end
     .loop2:
        pushad

        sar     ebx,ROUND2
        sar     eax,ROUND2

        mov     esi,.s_buff
        movups  xmm5,.ymin

        call    .cal_line  ;stencil_line

        popad
        add     eax,.dx13
        add     ebx,.dx23

        movlps  xmm0,.zz1
        movhps  xmm0,.zz2
        movlps  xmm3,.dz13
        movhps  xmm3,.dz23
        addps   xmm0,xmm3
        shufps  xmm0,xmm0,11111000b
        movlps  .zz1,xmm0


        inc     ecx
        cmp     cx,.y3
        jl      .loop2
     .loop2_end:

        mov     esp,ebp
        pop     ebp

ret

stencil_line:
;----------------------------------------------------
;-------------in xmm0 - lo -> hi z1, z2 -------------
;--------------- xmm5 - lo -> hi y_min, y_max, ------
;--------------- x_min, x_max -----------------------
;--------------- eax - x1 ---------------------------
;--------------- ebx - x2 ---------------------------
;--------------- ecx - y-----------------------------
;--------------- edx - x_res shl 16 + y_res ---------
;--------------- esi - pointer to z-buffer ----------


.x_res  equ word[ebp-2]
.y_res  equ [ebp-4]
.x1     equ [ebp-8]
.x2     equ [ebp-12]
.z2     equ [ebp-16]
.z1     equ [ebp-20]

.x_max  equ [ebp-24]
.x_min  equ [ebp-28]
.y_max  equ [ebp-32]
.y_min  equ [ebp-36]





     push       ebp
     mov        ebp,esp

     sub        esp,48
     movups     .y_min,xmm5
     mov        .y_res,edx

     cmp         ecx,.y_min
     jl         .l_quit
     cmp         ecx,.y_max  ;.y_res
     jge        .l_quit

     cmp        eax,ebx
     je         .l_quit
     jl         .l_ok

     xchg       eax,ebx
     shufps     xmm0,xmm0,11100001b
   .l_ok:

     cmp        eax,.x_max
     jge        .l_quit
     cmp         ebx,.x_min
     jle        .l_quit

     mov        .x1,eax
     mov        .x2,ebx
     movaps     xmm2,xmm0
     sub        ebx,eax
     movaps     xmm6,xmm2
     shufps     xmm6,xmm6,11111101b
     cvtsi2ss   xmm1,ebx
     subps      xmm6,xmm0  ;.z1
     rcpps      xmm1,xmm1
     mulps      xmm6,xmm1
     mov        edx,.x_min
     cmp        eax,edx   ;.x_min
     jge        @f

     mov        eax,edx  ;.x_min
     sub        eax,.x1
     cvtsi2ss   xmm0,eax
     mulps      xmm0,xmm6
     addps      xmm2,xmm0
     mov        .x1,edx
   @@:
     mov        edx,.x_max
     mov        ebx,.x2
     cmp        ebx,edx
     cmovnl     ebx,edx
  ;   mov        .x2,ebx


     movzx      edx,.x_res
     mov        eax,ecx
     mul        edx
     add        eax,.x1

     shl        eax,2
     add        esi,eax

     mov        ecx,ebx ;.x2
     sub        ecx,.x1
   .ccalc:
     movaps    xmm1,xmm2
     cmpltss   xmm1,dword[esi]
     movd      edx,xmm1
     or        edx,edx
     jz        @f
 ;    comiss    xmm2,dword[esi]
 ;    ja        @f
     movss     dword[esi],xmm2
    @@:
     add       esi,4
     addps     xmm2,xmm6
     loop      .ccalc

  .l_quit:
     mov       esp,ebp
     pop       ebp
ret







